*04_city_level_covariates.do

capture log close
clear
set linesize 255

global root =  "/disk/bulkw/nencka/schooling_pandemic/2021_10_18_final/"
global input   "$root/Input"
global scripts "$root/Scripts"
global output  "$root/Output"
global temp    "$root/Temp"
global log     "$root/Log"
global figures "$root/Figures"

set scheme plotplainblind


log using "$log/04_city_level_covariates", replace text


*Load 1910 data with cities

use sex race birthyr occscore_base bpl occ1950_base ind1950_base school statefip mcd ///
	using "$temp/data1910_1918pandemic.dta", replace

gen count = 1 

gen black = 1 if inlist(race,200,210)
replace black = 0 if mi(black)


*Create auxiliary dataset of the number of people in each place
preserve

collapse (sum) count, by(statefip mcd)
save "$temp/places_in_census_1910.dta", replace
restore



*Merge on city closure to 1910 census 

merge m:1 statefip mcd using "$temp/school_closures_towns_1920.dta"


preserve
keep if inlist(_merge,2,3)
tab mcd _merge, m
restore

keep if inlist(_merge,1,3)

*Focus on our sample
drop if mi(in_daysclosed_sample)
drop _merge

*Merge on state closure data
merge m:1 statefip using "$temp/state_closures.dta"

assert _m != 1
drop if _m == 2

drop _merge


*Merge on mortality data
merge m:1 statefip mcd using "$temp/flu_mortality.dta"
drop if _m == 2
drop _merge



gen age = 1910 - birthyr 
tab age sex, m

	gen top_occupation = 1 if occscore_base > 25 & ~mi(occscore_base)
	replace top_occupation = 0 if occscore_base <= 25  & ~mi(occscore_base)
	replace top_occupation = . if !(inrange(age,25,54) & sex==1)

	replace occscore_base = . if !(inrange(age,25,54) & sex==1)

	gen foreignb = 1 if bpl > 13000 & bpl < 90000  & ~mi(bpl)
	replace foreignb = 0 if mi(foreignb) & ~mi(bpl)

	gen in_lf = 1 if occ1950_base <=970 & ~mi(occ1950_base)
	replace in_lf = 0 if mi(in_lf) & ~mi(occ1950_base)


	gen in_school  = 1 if school == 2
	replace in_school = 0 if school == 1
	replace in_school = . if (age < 6 | age > 18) & ~mi(age)

	gen in_school_6_10  = 1 if in_school == 1 & inrange(age,6,10)
	replace in_school_6_10 = 0 if in_school == 0  & inrange(age,6,10)

	gen in_school_11_14  = 1 if in_school == 1 & inrange(age,11,14)
	replace in_school_11_14 = 0 if in_school == 0  & inrange(age,11,14)

	gen in_school_15_18  = 1 if in_school  == 1 & inrange(age,15,18)
	replace in_school_15_18 = 0 if in_school == 0  & inrange(age,15,18)

	tab age in_school_15_18, mi 

	gen male_school = 1 if sex == 1 & in_school == 1
	replace male_school = 0 if sex == 1 & in_school == 0

	gen female_school = 1 if sex == 2 & in_school == 1
	replace female_school = 0 if sex == 2 & in_school == 0

	gen med_ind = 1 if inlist(ind1950, 868, 869)
	replace med_ind = 0 if mi(med_ind)

	drop school 
	rename in_school school 


gcollapse (sum) count (mean) black age occscore_base top_occupation foreignb in_lf ///
	school in_school_6_10 in_school_11_14 in_school_15_18 male_school ///
	female_school med_ind  (median) days_closed excess_death_ratio order_or_rec in_daysclosed_sample, by(statefip mcd) 




sum excess_death_ratio, d
replace excess_death_ratio = . if excess_death_ratio> r(p99) & !missing(excess_death_ratio)
replace excess_death_ratio = .  if excess_death_ratio< r(p1) & !missing(excess_death_ratio)

su days_closed

preserve

foreach var of varlist black age occscore_base top_occupation foreignb in_lf ///
	school in_school_6_10 in_school_11_14 in_school_15_18 male_school ///
	female_school med_ind {

	rename `var' `var'_avg
}

sort statefip mcd
save "$temp/city_covariates.dta", replace 

restore


cd "$figures"

*Scatterplot

	scatter days_closed excess_death_ratio [w=count], msymbol(circle_hollow) ytitle("Days schools closed") xtitle("Excess flu deaths ratio") xsc(r(-1 5))
		graph export "scatter_deaths_days.eps", replace 
		!convert -density 2000 scatter_deaths_days.eps scatter_deaths_days.png
		!convert -density 2000 scatter_deaths_days.eps scatter_deaths_days.pdf

*Log count 
	su count 
	replace count = ln(count)

*Standardize variables
	foreach var of varlist count black age occscore_base top_occupation school in_school_6_10 in_school_11_14 in_school_15_18 foreignb in_lf male_school female_school med_ind order_or_rec {
		egen std_`var' = std(`var')
	}



	*No SY FE, with zeros
		reg std_count days_closed, robust 
			estimates store pop 
		reg std_black days_closed, robust 
			estimates store black 
		reg std_occscore_base days_closed, robust 
			estimates store occscore_base 
		reg std_top_occupation days_closed, robust 
			estimates store top_occupation 
		reg std_school days_closed, robust 
			estimates store school 
		reg std_in_school_6_10 days_closed, robust 
			estimates store in_school_6_10 
		reg std_in_school_11_14 days_closed, robust 
			estimates store in_school_11_14 
		reg std_in_school_15_18 days_closed, robust 
			estimates store in_school_15_18 
		reg std_male_school days_closed, robust 
			estimates store mschool 
		reg std_female_school days_closed, robust 
			estimates store fschool 
		reg std_med_ind days_closed, robust 
			estimates store med 
		reg std_foreignb days_closed, robust 
			estimates store foreignb 
		reg std_in_lf days_closed, robust 
			estimates store in_lf 
		reg std_order_or_rec days_closed, robust 
			estimates store no_order 

	coefplot  (pop \ black \ in_lf \ occscore_base \ top_occupation \ med \ school \ in_school_6_10 \ in_school_11_14 \ in_school_15_18 \ mschool \ fschool \ foreignb \ no_order), drop(_cons) aseq swapnames level(95) xline(0) ///
			xtitle(Coefficient on days closed) ///
			coeflabels(pop = "Population" black = "Share black" in_lf = "Share in labor force" ///
				occscore_base = "Occscore" top_occupation = "Share top occupation" school = "In-school" ///
				in_school_6_10 = "In-school (6-10)" in_school_11_14 = "In-school (11-14)" in_school_15_18 = "In-school (15-18)" ///
				mschool = "In-school (men)" fschool = "In-school (women)" med = "Share in medical field" ///
				foreignb = "Share foreign-born" no_order = "Had state closure reccomendation")

			graph export "predictors_closing_zeros.eps", replace 
			!convert -density 2000 predictors_closing_zeros.eps predictors_closing_zeros.png
			!convert -density 2000 predictors_closing_zeros.eps predictors_closing_zeros.pdf

log close

*Create histogram of closure lengths


hist days_closed, xtitle(Days schools closed)
	
	graph export  "days_closed_hist.eps", replace
			!convert -density 2000 days_closed_hist.eps days_closed_hist.png
			!convert -density 2000 days_closed_hist.eps days_closed_hist.pdf

